22 May, 2017

suppressWarnings(library(tidyverse))
suppressWarnings(library(magrittr))
suppressWarnings(library(knitr))
suppressWarnings(library(broom))
suppressWarnings(library(arsRtools))

PROMPTs

deeptools_file_iasillo <- '/Users/schmidm/Documents/other_people_to_and_from/ClaudiaI/Effie_RNAseq_bws/RNAseq_deeptools_out/Chen_PROMPT_TSS_sensitivity_joined_sensitivity.gz'

deeptools_file_meola <- '/Users/schmidm/Documents/other_people_to_and_from/ClaudiaI/Effie_RNAseq_bws/RNAseq_deeptools_out_meola/Chen_PROMPT_TSS_sensitivity_joined_sensitivity.gz'

biotype <- 'PROMPT'
anchor <- 'TSS'

Deeptools to R

This part is not run here, just shown for Documentation. The resulting tidy data frame has been saved and is loaded in the next section.

#!/bin/sh

cd /home/schmidm/faststorage/ClaudiaI/Effie_RNAseq_bws

bash ~/ms_tools/MS_Metagene_Tools/RNAseq_sensitivityX.sh reference-point \
"/home/schmidm/annotations/hg19/Chen_etal/Chen_PROMPT_TSSs.bed" \
"*plus.bw" \
"*minus.bw" \
5000 5000 TSS PROMPTs "eGFP" "Ars2,Cbp80,Z18" \
"deeptools_out/Chen_PROMPT_TSS_sensitivity" "--binSize=50 --numberOfProcessors=4"


cd /home/schmidm/faststorage/ClaudiaI/Effie_RNAseq_bws/Meola_RNAseq_bws/

bash ~/ms_tools/MS_Metagene_Tools/RNAseq_sensitivityX.sh reference-point \
"/home/schmidm/annotations/hg19/Chen_etal/Chen_PROMPT_TSSs.bed" \
"*plus.bw" \
"*minus.bw" \
5000 5000 TSS PROMPTs "EGFP" "RRP40" \
"deeptools_out/Chen_PROMPT_TSS_sensitivity" "--binSize=50 --numberOfProcessors=4"

matrix files for iasillo: /Users/schmidm/Documents/other_people_to_and_from/ClaudiaI/Effie_RNAseq_bws/RNAseq_deeptools_out/Chen_PROMPT_TSS_sensitivity_joined_sensitivity.gz
matrix files for meola /Users/schmidm/Documents/other_people_to_and_from/ClaudiaI/Effie_RNAseq_bws/RNAseq_deeptools_out_meola/Chen_PROMPT_TSS_sensitivity_joined_sensitivity.gz
biotype: PROMPT
anchor: TSS

note for the PROMPTs the ids is simply the TSS information

(bin_values <- arsRtools::load_RNAseq_matrices(deeptools_file_iasillo, deeptools_file_meola))
bin_values_filename <- paste0('../data/RNAseq/RNAseq_bin_values_', biotype, '_', anchor, '.RData')

Saving bin values to file: ../data/RNAseq/RNAseq_bin_values_PROMPT_TSS.RData

save(bin_values, file=bin_values_filename)

Load the tidy data frame of single 50bp bin values

everything below is run.

load(bin_values_filename, verbose=TRUE)
## Loading objects:
##   bin_values

heatmaps scaled values

data('RNAseq_value_heatmap_theme', package='arsRtools')
row_order <- bin_values %>%
    group_by(id) %>% 
    summarize(total_value = sum(value)) %>% 
    ungroup %>% 
    arrange(total_value) %$% 
    id

bin_values %<>%
  mutate(id = factor(id, levels=row_order))
bin_values %>%
  filter(value > 0,
         rel_pos > -2000, rel_pos < 5000) %>%
  ggplot(., aes(x=rel_pos, y=id, fill=log2(value))) +
  geom_raster(interpolate = FALSE) +
  facet_grid(.~siRNA) +
  xlab(paste0('bp to ', biotype, ' ', anchor)) +
  RNAseq_value_heatmap_theme +
  theme(axis.text.y = element_blank())

log2 ratios

bin_sensitivities <- arsRtools::RNAseq_sensitivity_matrix(bin_values)

log2FC profiles

data('RNAseq_lineplot_theme', package='arsRtools')
bin_sensitivities %>%
  group_by(rel_pos, siRNA) %>%
  do(tidy(t.test(log2(.$value)))) %>%
  ggplot(., aes(x=rel_pos, y=estimate, color=siRNA)) +
  geom_ribbon(aes(ymin=conf.low, ymax=conf.high, fill=siRNA, color=NULL), alpha=.5) +
  geom_line(aes(color=siRNA)) +
  ylab('mean log2 fold-change relative control') +
  xlab(paste0('bp to ', biotype, ' ', anchor)) +
  RNAseq_lineplot_theme +
  xlim(-2000,5000)

bin_sensitivities %>%
  group_by(rel_pos, siRNA) %>%
  do(tidy(t.test(log2(.$value)))) %>%
  group_by(siRNA) %>%
  mutate(estimate = (estimate-min(estimate))/(max(estimate)-min(estimate))) %>%
  ggplot(., aes(x=rel_pos, y=estimate, color=siRNA)) +
  geom_line(aes(color=siRNA)) +
  ylab('mean log2 fold-change relative control') +
  xlab(paste0('bp to ', biotype, ' ', anchor)) +
  RNAseq_lineplot_theme + 
  xlim(-2000,5000)

Wilcoxon FDR plot

wilcox_fdr <- bin_sensitivities %>%
  group_by(rel_pos, siRNA) %>%
  do(tidy(wilcox.test(log2(.$value), alternative='greater'))) %>%
  mutate(padj = p.adjust(p.value, method='BH'))
ggplot(wilcox_fdr, aes(x=rel_pos, y=-log10(padj), color=siRNA)) +
  geom_line(aes(color=siRNA)) +
  geom_hline(yintercept = 1, color='orange', linetype=2) +
  facet_grid(siRNA~.) +
  ylab('-log10(fdr)') +
  xlab(paste0('bp to ', biotype, ' ', anchor))  +
  RNAseq_lineplot_theme +
  xlim(-2000,5000)

point of below FDR=1 ….

wilcox_fdr %>%
  group_by(siRNA) %>%
  filter(rel_pos > 0 & padj > .1) %>%
  summarize(min_rel_pos = min(rel_pos))
## # A tibble: 2 × 2
##    siRNA min_rel_pos
##   <fctr>       <dbl>
## 1   Ars2        4900
## 2    Z18        1300

heatmaps

data('RNAseq_logFC_heatmap_theme', package='arsRtools')
bin_sensitivities %>% 
  filter(rel_pos > -2000, rel_pos < 5000) %>%
  mutate(id = factor(id, levels=row_order)) %>%
  ggplot(., aes(x=rel_pos, y=id, fill=log2(value))) +
  geom_raster(interpolate = FALSE) +
  facet_grid(.~siRNA) +
  RNAseq_logFC_heatmap_theme +
  theme(axis.text.y = element_blank()) +
  xlab(paste0('bp to ', biotype, ' ', anchor))

bin_sensitivities %>% 
  mutate(value = case_when(.$value > 4 ~ 4,
                           .$value < .25 ~ .25,
                           TRUE ~ .$value)) %>%
  filter(rel_pos > -2000, rel_pos < 5000) %>%
  ggplot(., aes(x=rel_pos, y=id, fill=log2(value))) +
  geom_raster(interpolate = FALSE) +
  facet_grid(.~siRNA) +
  RNAseq_logFC_heatmap_theme +
  theme(axis.text.y=element_blank()) +
  xlab(paste0('bp to ', biotype, ' ', anchor))

eRNA

deeptools_file_iasillo <- '/Users/schmidm/Documents/other_people_to_and_from/ClaudiaI/Effie_RNAseq_bws/RNAseq_deeptools_out/Chen_eRNA_TSS_sensitivity_joined_sensitivity.gz'

deeptools_file_meola <- '/Users/schmidm/Documents/other_people_to_and_from/ClaudiaI/Effie_RNAseq_bws/RNAseq_deeptools_out_meola/Chen_eRNA_TSS_sensitivity_joined_sensitivity.gz'

biotype <- 'eRNA'
anchor <- 'TSS'

Deeptools to R

This part is not run here, just shown for Documentation. The resulting tidy data frame has been saved and is loaded in the next section.

#!/bin/sh

cd /home/schmidm/faststorage/ClaudiaI/Effie_RNAseq_bws

bash ~/ms_tools/MS_Metagene_Tools/RNAseq_sensitivityX.sh reference-point \
"/home/schmidm/annotations/hg19/Chen_etal/Chen_eRNA_TSSs_uniq.bed" \
"*plus.bw" \
"*minus.bw" \
5000 5000 TSS eRNAs "eGFP" "Ars2,Cbp20,Cbp80,NCBP3,Z18" \
"deeptools_out/Chen_eRNA_TSS_sensitivity" "--binSize=50 --numberOfProcessors=4"


cd /home/schmidm/faststorage/ClaudiaI/Effie_RNAseq_bws/Meola_RNAseq_bws/

bash ~/ms_tools/MS_Metagene_Tools/RNAseq_sensitivityX.sh reference-point \
"/home/schmidm/annotations/hg19/Chen_etal/Chen_eRNA_TSSs_uniq.bed" \
"*plus.bw" \
"*minus.bw" \
5000 5000 TSS eRNAs "EGFP" "RRP40" \
"deeptools_out/Chen_eRNA_TSS_sensitivity" "--binSize=50 --numberOfProcessors=4"

matrix files for iasillo: /Users/schmidm/Documents/other_people_to_and_from/ClaudiaI/Effie_RNAseq_bws/RNAseq_deeptools_out/Chen_eRNA_TSS_sensitivity_joined_sensitivity.gz
matrix files for meola /Users/schmidm/Documents/other_people_to_and_from/ClaudiaI/Effie_RNAseq_bws/RNAseq_deeptools_out_meola/Chen_eRNA_TSS_sensitivity_joined_sensitivity.gz
biotype: eRNA
anchor: TSS

note for the eRNAs the ids is useless (its actually the distance between forward and reverse TSS from Chen et al) so I replace it with something more informative

(bin_values <- arsRtools::load_RNAseq_matrices(deeptools_file_iasillo, deeptools_file_meola) %>%
  mutate(id=paste0(chr, ':', start, '-', end)))
bin_values_filename <- paste0('../data/RNAseq/RNAseq_bin_values_', biotype, '_', anchor, '.RData')

Saving bin values to file: ../data/RNAseq/RNAseq_bin_values_eRNA_TSS.RData

save(bin_values, file=bin_values_filename)

Load the tidy data frame of single 50bp bin values

everything below is run.

load(bin_values_filename, verbose=TRUE)
## Loading objects:
##   bin_values

heatmaps scaled values

row_order <- bin_values %>%
    group_by(id) %>% 
    summarize(total_value = sum(value)) %>% 
    ungroup %>% 
    arrange(total_value) %$% 
    id

bin_values %<>%
  mutate(id = factor(id, levels=row_order))
bin_values %>%
  filter(value > 0,
         rel_pos > -2000, rel_pos < 5000) %>%
  ggplot(., aes(x=rel_pos, y=id, fill=log2(value))) +
  geom_raster(interpolate = FALSE) +
  facet_grid(.~siRNA) +
  xlab(paste0('bp to ', biotype, ' ', anchor)) +
  RNAseq_value_heatmap_theme +
  theme(axis.text.y = element_blank())

log2 ratios

bin_sensitivities <- arsRtools::RNAseq_sensitivity_matrix(bin_values)

log2FC profiles

bin_sensitivities %>%
  group_by(rel_pos, siRNA) %>%
  do(tidy(t.test(log2(.$value)))) %>%
  ggplot(., aes(x=rel_pos, y=estimate, color=siRNA)) +
  geom_ribbon(aes(ymin=conf.low, ymax=conf.high, fill=siRNA, color=NULL), alpha=.5) +
  geom_line(aes(color=siRNA)) +
  ylab('mean log2 fold-change relative control') +
  xlab(paste0('bp to ', biotype, ' ', anchor)) +
  RNAseq_lineplot_theme +
  xlim(-2000,5000)

bin_sensitivities %>%
  group_by(rel_pos, siRNA) %>%
  do(tidy(t.test(log2(.$value)))) %>%
  group_by(siRNA) %>%
  mutate(estimate = (estimate-min(estimate))/(max(estimate)-min(estimate))) %>%
  ggplot(., aes(x=rel_pos, y=estimate, color=siRNA)) +
  geom_line(aes(color=siRNA)) +
  ylab('mean log2 fold-change relative control') +
  xlab(paste0('bp to ', biotype, ' ', anchor)) +
  RNAseq_lineplot_theme + 
  xlim(-2000,5000)

Wilcoxon FDR plot

wilcox_fdr <- bin_sensitivities %>%
  group_by(rel_pos, siRNA) %>%
  do(tidy(wilcox.test(log2(.$value), alternative='greater'))) %>%
  mutate(padj = p.adjust(p.value, method='BH'))
ggplot(wilcox_fdr, aes(x=rel_pos, y=-log10(padj), color=siRNA)) +
  geom_line(aes(color=siRNA)) +
  geom_hline(yintercept = 1, color='orange', linetype=2) +
  facet_grid(siRNA~.) +
  ylab('-log10(fdr)') +
  xlab(paste0('bp to ', biotype, ' ', anchor))  +
  RNAseq_lineplot_theme +
  xlim(-2000,5000)

point of below FDR=1 ….

wilcox_fdr %>%
  group_by(siRNA) %>%
  filter(rel_pos > 0 & padj > .1) %>%
  summarize(min_rel_pos = min(rel_pos))
## # A tibble: 1 × 2
##    siRNA min_rel_pos
##   <fctr>       <dbl>
## 1    Z18        1350

heatmaps

bin_sensitivities %>% 
  filter(rel_pos > -2000, rel_pos < 5000) %>%
  mutate(id = factor(id, levels=row_order)) %>%
  ggplot(., aes(x=rel_pos, y=id, fill=log2(value))) +
  geom_raster(interpolate = FALSE) +
  facet_grid(.~siRNA) +
  RNAseq_logFC_heatmap_theme +  
  xlab(paste0('bp to ', biotype, ' ', anchor))  +
  theme(axis.text.y = element_blank())

bin_sensitivities %>% 
  mutate(value = case_when(.$value > 4 ~ 4,
                           .$value < .25 ~ .25,
                           TRUE ~ .$value)) %>%
  filter(rel_pos > -2000, rel_pos < 5000) %>%
  ggplot(., aes(x=rel_pos, y=id, fill=log2(value))) +
  geom_raster(interpolate = FALSE) +
  facet_grid(.~siRNA) +
  RNAseq_logFC_heatmap_theme +  
  xlab(paste0('bp to ', biotype, ' ', anchor))  +
  theme(axis.text.y = element_blank())

sessionInfo()
## R version 3.2.3 (2015-12-10)
## Platform: x86_64-apple-darwin13.4.0 (64-bit)
## Running under: OS X 10.11.6 (El Capitan)
## 
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
## 
## attached base packages:
## [1] stats4    parallel  stats     graphics  grDevices utils     datasets 
## [8] methods   base     
## 
## other attached packages:
##  [1] arsRtools_0.1        RColorBrewer_1.1-2   rtracklayer_1.30.4  
##  [4] GenomicRanges_1.22.4 GenomeInfoDb_1.6.3   IRanges_2.4.8       
##  [7] S4Vectors_0.8.11     BiocGenerics_0.16.1  broom_0.4.1         
## [10] knitr_1.15.1         magrittr_1.5         dplyr_0.5.0         
## [13] purrr_0.2.2          readr_1.0.0          tidyr_0.6.0         
## [16] tibble_1.2           ggplot2_2.2.0        tidyverse_1.0.0     
## 
## loaded via a namespace (and not attached):
##  [1] Rcpp_0.12.8                futile.logger_1.4.3       
##  [3] XVector_0.10.0             plyr_1.8.4                
##  [5] futile.options_1.0.0       bitops_1.0-6              
##  [7] zlibbioc_1.16.0            tools_3.2.3               
##  [9] digest_0.6.10              evaluate_0.10             
## [11] gtable_0.2.0               nlme_3.1-128              
## [13] lattice_0.20-34            psych_1.6.9               
## [15] DBI_0.5-1                  yaml_2.1.14               
## [17] stringr_1.1.0              Biostrings_2.38.4         
## [19] rprojroot_1.1              grid_3.2.3                
## [21] Biobase_2.30.0             R6_2.2.0                  
## [23] BiocParallel_1.4.3         XML_3.98-1.5              
## [25] foreign_0.8-67             rmarkdown_1.2             
## [27] lambda.r_1.1.9             reshape2_1.4.2            
## [29] GenomicAlignments_1.6.3    Rsamtools_1.22.0          
## [31] backports_1.0.4            scales_0.4.1              
## [33] htmltools_0.3.5            SummarizedExperiment_1.0.2
## [35] assertthat_0.1             mnormt_1.5-5              
## [37] colorspace_1.3-2           labeling_0.3              
## [39] stringi_1.1.2              RCurl_1.95-4.8            
## [41] lazyeval_0.2.0             munsell_0.4.3